{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pyreadstat import pyreadstat\n",
    "import scipy.stats as stats\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df, metadata = pyreadstat.read_sav(R'data\\indentity问卷数据数据清理后.sav',\n",
    "                                   apply_value_formats=True,\n",
    "                                   formats_as_ordered_category=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = df['政治面貌'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "106"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result['党员']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "823"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.10591156047528699, 0.15168260720393534)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# proportion_confint方法的第一个参数为比例个数，第二个参数为总体个数\n",
    "sm.stats.proportion_confint(106,823)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def propci(df,col,cat_label):\n",
    "    \"\"\"计算类别变量具体某个类别的比例及其置信区间\"\"\"\n",
    "    result = df[col].value_counts()\n",
    "    prop = result[cat_label]/df.shape[0] * 100\n",
    "    ci = sm.stats.proportion_confint(result[cat_label],df.shape[0])\n",
    "    return F'变量{col}中类别{cat_label}的比例是{prop:.1f}%，95%置信区间为[{ci[0]*100:.1f}%,{ci[1]*100:.1f}%]'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'变量政治面貌中类别党员的比例是12.9%，95%置信区间为[10.6%,15.2%]'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "propci(df,'政治面貌','党员')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'变量年级中类别大一的比例是34.6%，95%置信区间为[31.4%,37.9%]'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "propci(df,'年级','大一')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7.374140147755115, 7.748581601941117)\n"
     ]
    }
   ],
   "source": [
    "data= df['认知维度']\n",
    "df1 = len(data) - 1\n",
    "alpha= 0.95\n",
    "ci = stats.t.interval(alpha, df1, loc=np.mean(data), scale=stats.sem(data))\n",
    "print(ci)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def meanci(df,col,alpha=0.95):\n",
    "    \"\"\"计算数值变量的平均值及其置信区间\"\"\"\n",
    "    data= df[col]\n",
    "    df1 = len(data) - 1\n",
    "    alpha= 0.95\n",
    "    std = np.std(data)\n",
    "    ci = stats.t.interval(alpha, df1, loc=np.mean(data), scale=stats.sem(data))\n",
    "    return  F'变量{col}的均值是{np.mean(data):.2f}(SD={std:.2f})，95%置信区间为[{ci[0]:.2f},{ci[1]:.2f}]'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'变量认知维度的均值是7.56(SD=2.73)，95%置信区间为[7.37,7.75]'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meanci(df,'认知维度')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9650cb4e16cdd4a8e8e2d128bf38d875813998db22a3c986335f89e0cb4d7bb2"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
